#==============================================================================#
# Explaining Support for COVID-19 Cell Phone Contact Tracing (CJPS)
# Replication Data
#==============================================================================#
library(dplyr) 
library(ggplot2)
library(ggpubr)
library(ggsci)
library(ggstance)
library(ggthemes)
library(dotwhisker)
library(broom)
library(stargazer)
library(margins)
library(survey)
library(aod)
library(mlogit)
library(ordinal)
library(QuantPsyc)
library(psych)

df = read.csv("COVIDApps.csv")
survey_design = svydesign(id=~LimeID, weights=~weight, data=df)

#==============================================================================#
# Distribution of answers to COVID-19 app question (Figure 1)
#==============================================================================#

pdf("figures/f1.pdf", width=7, height=5, paper='special')
ggplot(df, aes(x = covid_apps, fill=covid_apps, colour=covid_apps)) +  
  geom_bar(aes(y= (..count..)/sum(..count..)*100),size=0.5,width = 0.75) +
  scale_fill_jco(alpha=0.4) + scale_color_jco() + theme_pubr() +
  theme(text = element_text(size=20),legend.position="none") +
  ylab("Sample percentage") + xlab("") +
  annotate("text", x=1, y=18, label="15.1%", size=6) +
  annotate("text", x=2, y=50, label="47.3%", size=6) +
  annotate("text", x=3, y=40, label="37.6%", size=6) 
dev.off()

#==============================================================================#
# Unconditional support for COVID-19 apps, by treatment group (Figure 2)
#==============================================================================#

pdf("figures/f2.pdf", width=7, height=5, paper='special')
ggbarplot(df, y="cellphone", x="treatment1", add = "mean_se", color = "treatment1",
          ylab="Support for Mandatory COVID-19 App", ylim=c(0.0,0.5), 
          palette="jco", fill="treatment1",
          lab.size=16, position = position_dodge(0.9), alpha=0.4, 
          xlab="Treatment", ggtheme = theme_pubr()) +
  scale_x_discrete(labels=c("Control", "Non-Compliers", "Large Infection Rate")) +
  rremove("legend")
dev.off()

prop.test(table(df$treatment1, df$cellphone))$p.value
prop.test(table(df$lockdown, df$cellphone))$p.value
prop.test(table(df$minister, df$cellphone))$p.value

#==============================================================================#
# Main models SATE (Figure 3, Table A2)
#==============================================================================#
f1 = cellphone ~ treatment1 + notserious + worried + trudeau + 
  lostjob + above56 + female + Atlantic + BC + Prairies + Quebec 
f2 = cellphone ~ treatment1 + dv_wording + notserious + worried + 
  trudeau + lostjob + above56 + female + Atlantic + BC + Prairies + Quebec

model1 = glm(f1, data = df, family=binomial("logit"))
# Use quasi-binomial family to avoid warning message when using weights
# (results identical to family=binomial).
model1w = svyglm(f1, design=survey_design, family=quasibinomial("logit")) 
model2w = svyglm(f2, design=survey_design, family=quasibinomial("logit"))
ClassLog(model1, resp=df$cellphone)$overall
ClassLog(model1w, resp=df$cellphone)$overall
ClassLog(model2w, resp=df$cellphone)$overall

# Change in predicted probabilities
# (Simulations using observed values for remaining covariates)
m1m = margins(model1, change="minmax")
m2mw = margins(model2w, change="minmax", design=survey_design)
m1mw = margins(model1w, change="minmax", design=survey_design)

# Figure 3
brackets = list(c("Treatment", "Non-Compliers", "Large Infection Rate"),
                c("Covariates", "Not Serious Enough", "Female"),
                c("Question Wording", "Health Agency", "Dilemma"))

models = bind_rows(tidy(m2mw) %>% 
                    filter(term!= "Atlantic" &
                             term !="BC"  &
                             term !="Prairies"  &
                             term !="Quebec") %>% 
                    mutate(model = "Weighted and Wordings"),
                   tidy(m1mw) %>% 
                     filter(term!= "Atlantic" &
                              term !="BC"  &
                              term !="Prairies"  &
                              term !="Quebec") %>% 
                     mutate(model = "Weighted"),
                   tidy(m1m) %>% 
                     filter(term!= "Atlantic" &
                              term !="BC"  &
                              term !="Prairies"  &
                              term !="Quebec") %>% 
                     mutate(model = "Unweighted"))
       
pdf("figures/f3.pdf", width=9, height=7, paper='special')
{dwplot(models, dodge_size = 0.7, 
        vline = geom_vline(xintercept=0,color="grey",lty=2),
        dot_args = list(size = 3),
        dist_args = list(alpha = 0.75, size = 3))  %>%
    relabel_predictors(c(treatment1lockdown = "Non-Compliers",                       
                         treatment1minister = "Large Infection Rate",
                         notserious = "Not Serious Enough",
                         worried = "Worried", 
                         trudeau = "Trudeau Approval", 
                         lostjob = "Lost Job",
                         above56 = "Above 56 Years Old",
                         female = "Female",
                       dv_wordinghealth_agency = "Health Agency",
                       dv_wordingdilemma = "Dilemma")) +
    theme_pubr() +
    theme(legend.position="none") +
    scale_color_colorblind(name="Model") + 
    xlim(c(-0.2,0.55)) +
    annotate("segment", x=0.25, xend=0.3, y=1.5, yend=1.5, size=0.75, color="#E69F00") +
    annotate("segment", x=0.25, xend=0.3, y=2, yend=2, size=0.75,color="#56B4E9") +
    annotate("segment", x=0.25, xend=0.3, y=1, yend=1, size=0.75,color="#000000") +
    annotate("point", x=0.275, y=1.5, size=3,color="#E69F00") +
    annotate("point", x=0.275, y=2, size=3,color="#56B4E9") +
    annotate("point", x=0.275, y=1, size=3,color="#000000") +
    annotate("text", x=0.26, y=2.5, label="Model", size=3, hjust = 0) +
    annotate("text", x=0.31, y=2, label="Unweighted",size=3.5,hjust = 0) +
    annotate("text", x=0.31, y=1.5, label="Weighted",size=3.5,hjust = 0) +
    annotate("text", x=0.31, y=1, label="Wording Control",size=3.5,hjust = 0)} %>%
  add_brackets(brackets)
dev.off()

# Full models, Appendix Table A1
covariate.labels = c("Non-Compliers",
                     "Large Infection Rate", 
                     "Dilemma Wording",
                     "Health Agency Wording",
                     "Not Serious Enough",
                     "Worried",
                     "Trudeau Approval",
                     "Lost Job",
                     "Above 56 Years Old",
                     "Female",
                     "Atlantic (Base = Ontario)",
                     "British Columbia (Base = Ontario)",
                     "Prairies (Base = Ontario)",
                     "Québec (Base = Ontario)",
                     "Constant")

sink("tables/tableA2.txt")
cat("Table A2: Explaining Support for COVID-19 Cell Phone Contact Tracing (Full Results)\n")
stargazer(model1, model1w, model2w, 
          star.cutoffs = c(0.05,0.01,0.001), 
          covariate.labels=covariate.labels,
          dep.var.caption = "COVID-19 Contact Tracing Apps = Yes",
          dep.var.labels = "",
          model.names=F,
          omit.stat=c("ll","aic"),
          header=FALSE,
          column.labels = c("Unweighted", "Weighted", "Wording Control"),
          type="latex")
sink(NULL)

#==============================================================================#
# Testing that categories can be collapsed (Wald test)
#==============================================================================#
df_long = mlogit.data(df,"covid_apps",shape="wide")

f3 = covid_apps ~ 1 | treatment1 + notserious +  worried + trudeau + lostjob + above56 +  
  female + Atlantic + BC + Prairies + Quebec 

# Fitting multinomial logistic models
multinom1 = mlogit(f3, data = df_long, reflevel="Only if voluntary")
multinom2 = mlogit(f3, data = df_long, reflevel="Only if voluntary", weight=weight)

# Wald test for combining "Only if voluntary" and "No" outcome categories
# H0: No difference between categories
# (equivalent of `mlogtest, combine` in Stata)
aod::wald.test(b = coef(multinom1), Sigma = vcov(multinom1), Terms = seq(3, 25, by = 2))
aod::wald.test(b = coef(multinom2), Sigma = vcov(multinom2), Terms = seq(3, 25, by = 2))

# Treatment only
aod::wald.test(b = coef(multinom1), Sigma = vcov(multinom1), Terms = c(3,5))
aod::wald.test(b = coef(multinom2), Sigma = vcov(multinom2), Terms = c(3,5))

#==============================================================================#
# Testing ordered logistic regressions
#==============================================================================#
df$ordered = factor(df$covid_apps, 
                     ordered=TRUE, 
                     levels = c("No","Only if voluntary","Yes"))

ordinal_f = ordered ~ treatment1 + notserious +  worried + trudeau + lostjob + above56 +  
  female + Atlantic + BC + Prairies + Quebec 

ologit = clm(ordinal_f, data = df, link="logit", weight=weight)
summary(ologit)
# Result for H1 holds, but weaker

nominal_test(ologit)
# Parallel regression assumption (proportional odds) fails for treatment variable
# Ordinal model is not appropriate

#==============================================================================#
# Proportion of arguments: Open-ended question (Table 1)
#==============================================================================#
sink("tables/table1.txt")
cat("Table 1: Most Frequent Arguments about COVID-19 Apps\n")
for (i in 29:35){
  cat(names(df)[i], ": ", round((table(df[,i])/1200)[2]*100,1), "%\n", sep="")
}
cat("\nSignificance test (p-value): Reduce risk arguments and support for cell phone contact tracing\n")
print(prop.test(table(df$infected, df$cellphone))$p.value)
sink(NULL)

#==============================================================================#
# Open-ended response models (Figure 4, Table A3)
#==============================================================================#

f4 = choice ~ treatment1 + notserious + worried + trudeau + 
  lostjob + above56 + female + Atlantic + BC + Prairies + Quebec 
f5 = infected~ treatment1 + notserious + worried + 
  trudeau + lostjob + above56 + female + Atlantic + BC + Prairies + Quebec

model4w = svyglm(f4, design=survey_design, family=quasibinomial("logit")) 
model5w = svyglm(f5, design=survey_design, family=quasibinomial("logit"))
ClassLog(model4w, resp=df$choice)$overall
ClassLog(model5w, resp=df$infected)$overall

# Change in predicted probabilities
# (Simulations using observed values for remaining covariates)
m4mw = margins(model4w, change="minmax", design=survey_design)
m5mw = margins(model5w, change="minmax", design=survey_design)

covariate.labels = c("Non-Compliers",
                     "Large Infection Rate", 
                     "Not Serious Enough",
                     "Worried",
                     "Trudeau Approval",
                     "Lost Job",
                     "Above 56 Years Old",
                     "Female",
                     "Atlantic (Base = Ontario)",
                     "British Columbia (Base = Ontario)",
                     "Prairies (Base = Ontario)",
                     "Québec (Base = Ontario)",
                     "Constant")

sink("tables/tableA3.txt")
cat("Table A3: Determinants of Arguments on COVID-19 Apps (Full Results)\n")
stargazer(model5w, model4w, 
          star.cutoffs = c(0.05,0.01,0.001), 
          covariate.labels=covariate.labels,
          dep.var.caption = "",
          dep.var.labels = c("",""),
          column.labels = c("Others as a threat", "With conditions"))
sink(NULL)

# Figure 4
brackets = list(c("Treatment", "Non-Compliers", "Large Infection Rate"),
                c("Covariates", "Not Serious Enough", "Female"))

models = bind_rows(tidy(m5mw) %>% 
                     filter(term!= "Atlantic" &
                              term !="BC"  &
                              term !="Prairies"  &
                              term !="Quebec") %>% 
                     mutate(model = "Infected"),
                   tidy(m4mw) %>% 
                     filter(term!= "Atlantic" &
                              term !="BC"  &
                              term !="Prairies"  &
                              term !="Quebec") %>% 
                     mutate(model = "With Conditions"))

pdf("figures/f4.pdf", width=9, height=7, paper='special')
{dwplot(models, dodge_size = 0.7, 
        vline = geom_vline(xintercept=0,color="grey",lty=2),
        dot_args = list(size = 3),
        dist_args = list(alpha = 0.75, size = 3))  %>%
    relabel_predictors(c(treatment1lockdown = "Non-Compliers",                       
                         treatment1minister = "Large Infection Rate",
                         notserious = "Not Serious Enough",
                         worried = "Worried", 
                         trudeau = "Trudeau Approval", 
                         lostjob = "Lost Job",
                         above56 = "Above 56 Years Old",
                         female = "Female")) +
    theme_pubr() +
    theme(legend.position="none") +
    scale_color_colorblind(name="Model") + 
    xlim(c(-0.2,0.55)) +
    annotate("segment", x=0.25, xend=0.3, y=1.5, yend=1.5, size=0.75, color="#E69F00") +
    annotate("segment", x=0.25, xend=0.3, y=1, yend=1, size=0.75,color="#000000") +
    annotate("point", x=0.275, y=1.5, size=3,color="#E69F00") +
    annotate("point", x=0.275, y=1, size=3,color="#000000") +
    annotate("text", x=0.26, y=2, label="Dependent Variable", size=3, hjust = 0) +
    annotate("text", x=0.31, y=1.5, label="With Conditions",size=3.5,hjust = 0) +
    annotate("text", x=0.31, y=1, label="Others as a Threat",size=3.5,hjust = 0)} %>%
  add_brackets(brackets)
dev.off()

#==============================================================================#
# Alternative specifications (Table A4)
#==============================================================================#

alt1 = cellphone ~ treatment1 
alt2 = cellphone ~ treatment1 + dv_wording + tougherpolicy + notserious + worried + 
  trudeau + lostjob + above56 + female + relevel(region, ref="Ontario")

malt1 = svyglm(alt1, design=survey_design, family=gaussian)
malt2 = svyglm(alt2, design=survey_design, family=gaussian)
malt3 = svyglm(alt1, design=survey_design, family=quasibinomial(link="logit"))
malt4 = svyglm(alt2, design=survey_design, family=quasibinomial(link="logit"))

covariate.alt = c("Non-Compliers",
                  "Large Infection Rate", 
                  "Dilemma Wording",
                  "Health Agency",
                  "Tougher Policy Support",
                  "Not Serious Enough",
                  "Worried",
                  "Trudeau Approval",
                  "Lost Job",
                  "Above 56 Years Old",
                  "Female",
                  "Atlantic (Base = Ontario)",
                  "British Columbia (Base = Ontario)",
                  "Prairies (Base = Ontario)",
                  "Québec (Base = Ontario)",
                  "Constant")

sink("tables/tableA4.txt")
cat("Table A4: Alternative Specifications (Treatment Effects)\n")
stargazer(malt1, malt2, malt3, malt4, 
          star.cutoffs = c(0.05,0.01,0.001), 
          covariate.labels=covariate.alt,
          dep.var.caption = "COVID-19 Contact Tracing Apps = Yes",
          dep.var.labels = "",
          model.names=F,
          omit.stat=c("ll","aic"),
          header=FALSE,
          column.labels = c("Gaussian", "Gaussian", "Logistic", "Logistic"),
          type="latex")
sink(NULL)